{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "collapsed": true,
    "id": "0p2TOW290jDl"
   },
   "outputs": [],
   "source": [
    "# Data download and unzipping\n",
    "!wget https://raw.githubusercontent.com/AakashSudhakar/2018-data-science-bowl/master/compressed_files/stage1_test.zip -c\n",
    "!wget https://raw.githubusercontent.com/AakashSudhakar/2018-data-science-bowl/master/compressed_files/stage1_train.zip -c\n",
    "\n",
    "!mkdir stage1_train stage1_test\n",
    "\n",
    "!unzip stage1_train.zip -d stage1_train/\n",
    "!unzip stage1_test.zip -d stage1_test/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "collapsed": true,
    "id": "JGZ3lxTh0jDo"
   },
   "outputs": [],
   "source": [
    "# Downloading and intstalling keras\n",
    "!pip install keras"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "collapsed": true,
    "id": "va0eskk-0jDv"
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import random\n",
    "import sys\n",
    "import warnings\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from itertools import chain\n",
    "from skimage.io import imread, imshow, imread_collection, concatenate_images\n",
    "from skimage.transform import resize\n",
    "from skimage.morphology import label\n",
    "from keras.utils import Progbar\n",
    "\n",
    "from keras.models import Model, load_model\n",
    "from keras.layers import Input\n",
    "from keras.layers.core import Dropout, Lambda\n",
    "from keras.layers.convolutional import Conv2D, Conv2DTranspose,Convolution2D\n",
    "from keras.layers.pooling import MaxPooling2D\n",
    "from keras.layers.merge import concatenate\n",
    "from keras import backend as K\n",
    "\n",
    "warnings.filterwarnings('ignore', category=UserWarning, module='skimage')\n",
    "\n",
    "# Setting seed for reproducability\n",
    "seed = 42\n",
    "random.seed = seed\n",
    "np.random.seed = seed\n",
    "smooth = 1.\n",
    "epochs = 50"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "collapsed": true,
    "id": "UyNOyw9o0jDz"
   },
   "outputs": [],
   "source": [
    "# Data Path\n",
    "TRAIN_PATH = 'stage1_train/'\n",
    "TEST_PATH = 'stage1_test/'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "collapsed": true,
    "id": "sKCRl3RI0jD2"
   },
   "outputs": [],
   "source": [
    "# Get train and test IDs\n",
    "train_ids = next(os.walk(TRAIN_PATH))[1]\n",
    "test_ids = next(os.walk(TEST_PATH))[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "collapsed": true,
    "id": "ONv9yLve0jD6"
   },
   "outputs": [],
   "source": [
    "# Function read train images and mask return as nump array\n",
    "def read_train_data(IMG_WIDTH=256,IMG_HEIGHT=256,IMG_CHANNELS=3):\n",
    "    X_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)\n",
    "    Y_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool)\n",
    "    print('Getting and resizing train images and masks ... ')\n",
    "    sys.stdout.flush()\n",
    "    if os.path.isfile(\"train_img.npy\") and os.path.isfile(\"train_mask.npy\"):\n",
    "        print(\"Train file loaded from memory\")\n",
    "        X_train = np.load(\"train_img.npy\")\n",
    "        Y_train = np.load(\"train_mask.npy\")\n",
    "        return X_train,Y_train\n",
    "    a = Progbar(len(train_ids))\n",
    "    for n, id_ in enumerate(train_ids):\n",
    "        path = TRAIN_PATH + id_\n",
    "        img = imread(path + '/images/' + id_ + '.png')[:,:,:IMG_CHANNELS]\n",
    "        img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)\n",
    "        X_train[n] = img\n",
    "        mask = np.zeros((IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool)\n",
    "        for mask_file in next(os.walk(path + '/masks/'))[2]:\n",
    "            mask_ = imread(path + '/masks/' + mask_file)\n",
    "            mask_ = np.expand_dims(resize(mask_, (IMG_HEIGHT, IMG_WIDTH), mode='constant', \n",
    "                                        preserve_range=True), axis=-1)\n",
    "            mask = np.maximum(mask, mask_)\n",
    "        Y_train[n] = mask\n",
    "        a.update(n)\n",
    "    np.save(\"train_img\",X_train)\n",
    "    np.save(\"train_mask\",Y_train)\n",
    "    return X_train,Y_train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "collapsed": true,
    "id": "mp-tgQ1K0jD-"
   },
   "outputs": [],
   "source": [
    "# Function to read test images and return as numpy array\n",
    "def read_test_data(IMG_WIDTH=256,IMG_HEIGHT=256,IMG_CHANNELS=3):\n",
    "    X_test = np.zeros((len(test_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)\n",
    "    sizes_test = []\n",
    "    print('\\nGetting and resizing test images ... ')\n",
    "    sys.stdout.flush()\n",
    "    if os.path.isfile(\"test_img.npy\") and os.path.isfile(\"test_size.npy\"):\n",
    "        print(\"Test file loaded from memory\")\n",
    "        X_test = np.load(\"test_img.npy\")\n",
    "        sizes_test = np.load(\"test_size.npy\")\n",
    "        return X_test,sizes_test\n",
    "    b = Progbar(len(test_ids))\n",
    "    for n, id_ in enumerate(test_ids):\n",
    "        path = TEST_PATH + id_\n",
    "        img = imread(path + '/images/' + id_ + '.png')[:,:,:IMG_CHANNELS]\n",
    "        sizes_test.append([img.shape[0], img.shape[1]])\n",
    "        img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)\n",
    "        X_test[n] = img\n",
    "        b.update(n)\n",
    "    np.save(\"test_img\",X_test)\n",
    "    np.save(\"test_size\",sizes_test)\n",
    "    return X_test,sizes_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "collapsed": true,
    "id": "RqHkaWuw0jEG"
   },
   "outputs": [],
   "source": [
    "# Run-length encoding stolen from https://www.kaggle.com/rakhlin/fast-run-length-encoding-python\n",
    "def rle_encoding(x):\n",
    "    dots = np.where(x.T.flatten() == 1)[0]\n",
    "    run_lengths = []\n",
    "    prev = -2\n",
    "    for b in dots:\n",
    "        if (b>prev+1): run_lengths.extend((b + 1, 0))\n",
    "        run_lengths[-1] += 1\n",
    "        prev = b\n",
    "    return run_lengths\n",
    "\n",
    "def prob_to_rles(x, cutoff=0.5):\n",
    "    lab_img = label(x > cutoff)\n",
    "    for i in range(1, lab_img.max() + 1):\n",
    "        yield rle_encoding(lab_img == i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "collapsed": true,
    "id": "f7lr0MPo0jEI"
   },
   "outputs": [],
   "source": [
    "# Iterate over the test IDs and generate run-length encodings for each seperate mask identified by skimage\n",
    "def mask_to_rle(preds_test_upsampled):\n",
    "    new_test_ids = []\n",
    "    rles = []\n",
    "    for n, id_ in enumerate(test_ids):\n",
    "        rle = list(prob_to_rles(preds_test_upsampled[n]))\n",
    "        rles.extend(rle)\n",
    "        new_test_ids.extend([id_] * len(rle))\n",
    "    return new_test_ids,rles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "collapsed": true,
    "id": "xXvt-TWj0jEL"
   },
   "outputs": [],
   "source": [
    "# Metric function\n",
    "def dice_coef(y_true, y_pred):\n",
    "    y_true_f = K.flatten(y_true)\n",
    "    y_pred_f = K.flatten(y_pred)\n",
    "    intersection = K.sum(y_true_f * y_pred_f)\n",
    "    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)\n",
    "\n",
    "# Loss funtion\n",
    "def dice_coef_loss(y_true, y_pred):\n",
    "    return -dice_coef(y_true, y_pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "collapsed": true,
    "id": "IC8bP7Sd0jEQ"
   },
   "outputs": [],
   "source": [
    "def get_unet(IMG_WIDTH=256,IMG_HEIGHT=256,IMG_CHANNELS=3):\n",
    "    inputs = Input((IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS))\n",
    "    s = Lambda(lambda x: x / 255) (inputs)\n",
    "    c1 = Conv2D(16, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (s)\n",
    "    c1 = Dropout(0.1) (c1)\n",
    "    c1 = Conv2D(16, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c1)\n",
    "    p1 = MaxPooling2D((2, 2)) (c1)\n",
    "    c2 = Conv2D(32, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (p1)\n",
    "    c2 = Dropout(0.1) (c2)\n",
    "    c2 = Conv2D(32, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c2)\n",
    "    p2 = MaxPooling2D((2, 2)) (c2)\n",
    "\n",
    "    c3 = Conv2D(64, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (p2)\n",
    "    c3 = Dropout(0.2) (c3)\n",
    "    c3 = Conv2D(64, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c3)\n",
    "    p3 = MaxPooling2D((2, 2)) (c3)\n",
    "\n",
    "    c4 = Conv2D(128, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (p3)\n",
    "    c4 = Dropout(0.2) (c4)\n",
    "    c4 = Conv2D(128, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c4)\n",
    "    p4 = MaxPooling2D(pool_size=(2, 2)) (c4)\n",
    "\n",
    "    c5 = Conv2D(256, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (p4)\n",
    "    c5 = Dropout(0.3) (c5)\n",
    "    c5 = Conv2D(256, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c5)\n",
    "\n",
    "    u6 = Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same') (c5)\n",
    "    u6 = concatenate([u6, c4])\n",
    "    c6 = Conv2D(128, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (u6)\n",
    "    c6 = Dropout(0.2) (c6)\n",
    "    c6 = Conv2D(128, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c6)\n",
    "\n",
    "    u7 = Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same') (c6)\n",
    "    u7 = concatenate([u7, c3])\n",
    "    c7 = Conv2D(64, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (u7)\n",
    "    c7 = Dropout(0.2) (c7)\n",
    "    c7 = Conv2D(64, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c7)\n",
    "\n",
    "    u8 = Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same') (c7)\n",
    "    u8 = concatenate([u8, c2])\n",
    "    c8 = Conv2D(32, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (u8)\n",
    "    c8 = Dropout(0.1) (c8)\n",
    "    c8 = Conv2D(32, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c8)\n",
    "\n",
    "    u9 = Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same') (c8)\n",
    "    u9 = concatenate([u9, c1], axis=3)\n",
    "    c9 = Conv2D(16, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (u9)\n",
    "    c9 = Dropout(0.1) (c9)\n",
    "    c9 = Conv2D(16, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c9)\n",
    "\n",
    "    outputs = Conv2D(1, (1, 1), activation='sigmoid') (c9)\n",
    "\n",
    "    model = Model(inputs=[inputs], outputs=[outputs])\n",
    "    model.compile(optimizer='adam',loss='binary_crossentropy', metrics=[dice_coef])\n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "collapsed": true,
    "id": "BIt7dBv80jEU"
   },
   "outputs": [],
   "source": [
    "# get train_data\n",
    "train_img,train_mask = read_train_data()\n",
    "\n",
    "# get test_data\n",
    "test_img,test_img_sizes = read_test_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "collapsed": true,
    "id": "tgRwYl9Q0jEa"
   },
   "outputs": [],
   "source": [
    "# get u_net model\n",
    "u_net = get_unet()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "collapsed": true,
    "id": "DkvwWi340jEc"
   },
   "outputs": [],
   "source": [
    "# fit model on train_data\n",
    "print(\"\\nTraining...\")\n",
    "u_net.fit(train_img,train_mask,batch_size=16,epochs=epochs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "collapsed": true,
    "id": "SFWXcX1l0jEg"
   },
   "outputs": [],
   "source": [
    "print(\"Predicting\")\n",
    "# Predict on test data\n",
    "test_mask = u_net.predict(test_img,verbose=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "collapsed": true,
    "id": "9th6UbBs0jEl"
   },
   "outputs": [],
   "source": [
    "# Create list of upsampled test masks\n",
    "test_mask_upsampled = []\n",
    "for i in range(len(test_mask)):\n",
    "    test_mask_upsampled.append(resize(np.squeeze(test_mask[i]),\n",
    "                                       (test_img_sizes[i][0],test_img_sizes[i][1]), \n",
    "                                       mode='constant', preserve_range=True))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "collapsed": true,
    "id": "cdGSyY1o0jEo"
   },
   "outputs": [],
   "source": [
    "test_ids,rles = mask_to_rle(test_mask_upsampled)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "collapsed": true,
    "id": "kcDPfblT0jEu"
   },
   "outputs": [],
   "source": [
    "# Create submission DataFrame\n",
    "sub = pd.DataFrame()\n",
    "sub['ImageId'] = test_ids\n",
    "sub['EncodedPixels'] = pd.Series(rles).apply(lambda x: ' '.join(str(y) for y in x))\n",
    "\n",
    "sub.to_csv('sub-dsbowl2018.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "collapsed": true,
    "id": "UkQZeT3a0jEy"
   },
   "outputs": [],
   "source": [
    "# Code to download files from Google colab\n",
    "\n",
    "from google.colab import files\n",
    "\n",
    "files.download('sub-dsbowl2018.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {
     "autoexec": {
      "startup": false,
      "wait_interval": 0
     }
    },
    "colab_type": "code",
    "collapsed": true,
    "id": "gx9wPiRh2DkK"
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "collapsed_sections": [],
   "default_view": {},
   "name": "Data Science Bowl 2018.ipynb",
   "provenance": [],
   "version": "0.3.2",
   "views": {}
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
